#! /usr/bin/env python

import InitDB
db = InitDB.InitDB ("BotDB.sqlite")

import AccessDB
db = AccessDB.AccessDB ("BotDB.sqlite")
# Get the 1'st URL have not been visited from database
current_url = db.GetURLs(1)

import GetPage
page = GetPage.GetPage( \
current_url, "Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.2.15 Version/10.00")

import Parser

page_count = 1
#while (page_count < 100000):
while (page_count < 50):
	print "%5d - %s" % (page_count ,current_url)
	# Download a web page from 'current_url'
	contents = page.DownloadPage()
	# Add contents to database. HTML tags in contents should be removed before insert to database.
	db.AddContents (current_url , Parser.RemoveHtmlTags(contents) )
	try:
		urlparser = Parser.URLParser()
		urlparser.feed ( contents )
		urlparser.close()
		for url in urlparser.urls:
			if url[0:7] == "http://":
				host= Parser.URLHost(url)
				path= Parser.URLPath(url)
				host_id= db.AddHost (host)
				db.AddURL (host_id , path)
	except:
		pass
	current_url = db.GetURLs(1)
	page.Reset ( current_url ) 
	page_count +=1

db.Close()
page.Close()
